This analysis seeks to quantify the levels of harm caused / travel time and distance cost incurred in this new old world. Intuitively, someone seeking care in PA would have an easier time crossing state lines for care vs someone in LA. This attempts to wrap our heads around these inequities.
Full success could be creating a model to focus resources that reduce the net cost/harm incurred. E.g. while establishing protections in ID would be a good thing, it wouldn't help nearly as many people as establishing protections in TN.
If you're interested in executing the notebook/ playing with source: https://github.com/parquar/us-census-zips-geo/tree/dev/post_roe
import pandas as pd
import plotly.express as px
from lib import load_k_closest_clinic_distances
from helpers import zip3_census, draw_status_treemap
draw_status_treemap()
# adds drive miles and drive time from google distance matrix api
closest_clinics_from_at_risk_origins = load_k_closest_clinic_distances()
closest_clinics_from_at_risk_origins
| origin_lat | origin_lng | origin_state | dest_lat | dest_lng | destination_state | geodesic_miles | drive_miles | drive_duration | _roundtrip_hours | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 34.315791 | -85.295125 | GA | 37.528691 | -88.453991 | IL | 283 | 380.0 | 365.0 | 12.166667 |
| 1 | 34.315791 | -85.295125 | GA | 37.540668 | -88.816118 | IL | 297 | 374.0 | 349.0 | 11.633333 |
| 2 | 34.315791 | -85.295125 | GA | 37.139667 | -89.333679 | IL | 299 | 386.0 | 376.0 | 12.533333 |
| 3 | 34.315791 | -85.295125 | GA | 37.226148 | -89.294708 | IL | 301 | 385.0 | 369.0 | 12.300000 |
| 4 | 34.315791 | -85.295125 | GA | 37.476199 | -89.032362 | IL | 302 | 377.0 | 356.0 | 11.866667 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 5513 | 43.639891 | -110.731186 | WY | 42.854644 | -117.509120 | OR | 346 | 488.0 | 466.0 | 15.533333 |
| 5514 | 43.639891 | -110.731186 | WY | 40.045690 | -105.200890 | CO | 377 | 478.0 | 474.0 | 15.800000 |
| 5515 | 43.639891 | -110.731186 | WY | 39.671344 | -116.013637 | NV | 386 | 527.0 | 504.0 | 16.800000 |
| 5516 | 43.639891 | -110.731186 | WY | 40.428841 | -116.961096 | NV | 389 | 509.0 | 499.0 | 16.633333 |
| 5517 | 43.639891 | -110.731186 | WY | 38.972891 | -106.380522 | USA | 393 | 543.0 | 551.0 | 18.366667 |
5518 rows × 10 columns
def _get_distance_from_origin_stats(closest_clinics):
# groups by orig, and finds aggregate stats on the 10 closest clinics
distance_from_origin_stats = closest_clinics.groupby(['origin_state','origin_lat','origin_lng']).agg(
k=("origin_lat","count"),
one_way_crow_flies_mean=("geodesic_miles","mean"),
one_way_drive_miles_mean=("drive_miles","mean"),
round_trip_drive_hrs_mean=("_roundtrip_hours","mean")
).reset_index()
census_stats = zip3_census.rename(columns={"_state": "origin_state", "_lat": "origin_lat", "_lng": "origin_lng","_zip3":"origin_zip3"})
distance_from_origin_stats = census_stats.merge(distance_from_origin_stats) # add population + adi
return distance_from_origin_stats
distance_from_origin_stats = _get_distance_from_origin_stats(closest_clinics_from_at_risk_origins)
distance_from_origin_stats
| origin_state | origin_zip3 | origin_lat | origin_lng | _census_total | _adi_mean | k | one_way_crow_flies_mean | one_way_drive_miles_mean | round_trip_drive_hrs_mean | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | AL | 350** | 33.568065 | -86.712692 | 442190 | 71.368421 | 10 | 316.4 | 401.6 | 12.230000 |
| 1 | AL | 351** | 33.506417 | -86.624839 | 324591 | 65.675000 | 10 | 322.3 | 412.6 | 12.630000 |
| 2 | AL | 352** | 33.506077 | -86.798758 | 491463 | 66.857143 | 10 | 318.5 | 395.8 | 12.096667 |
| 3 | AL | 354** | 33.113778 | -87.787003 | 225321 | 73.265625 | 10 | 328.3 | 446.4 | 13.920000 |
| 4 | AL | 355** | 33.973542 | -87.662356 | 158109 | 85.100000 | 10 | 272.4 | 366.9 | 12.033333 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 521 | WY | 823** | 41.804052 | -106.980921 | 15430 | 67.400000 | 10 | 168.0 | 231.0 | 7.973333 |
| 522 | WY | 824** | 44.438020 | -108.408179 | 52930 | 51.636364 | 10 | 349.0 | 492.4 | 15.533333 |
| 523 | WY | 825** | 43.103802 | -108.847958 | 38910 | 60.888889 | 10 | 273.8 | 366.8 | 12.453333 |
| 524 | WY | 828** | 44.714826 | -106.872537 | 36958 | 37.461538 | 10 | 351.8 | 447.8 | 13.426667 |
| 525 | WY | 830** | 43.639891 | -110.731186 | 20421 | 3.800000 | 10 | 342.8 | 454.9 | 15.386667 |
526 rows × 10 columns
px.box(
distance_from_origin_stats,
x="origin_state",
y="round_trip_drive_hrs_mean",
title="Round trip driving hours required to access care | by State",
).show(renderer="notebook")
n = len(distance_from_origin_stats)
round_trip_mean_of_means = distance_from_origin_stats['round_trip_drive_hrs_mean'].mean()
px.scatter_mapbox(
distance_from_origin_stats,
lat="origin_lat",
lon="origin_lng",
size_max=15,
hover_data=["one_way_crow_flies_mean","one_way_drive_miles_mean","_adi_mean"],
height=800,
zoom=3,
size="_census_total",
color="round_trip_drive_hrs_mean",
title=f"Unprotected Origin Locations | n={n} | scaled by Population | Color=avg round trip drive time | round_trip_mean={'{:.2f}'.format(round_trip_mean_of_means)} hours",
).show(renderer="notebook")
px.histogram(
distance_from_origin_stats, x='round_trip_drive_hrs_mean',
title=f"On average, cares seekers would need to drive {'{:.2f}'.format(round_trip_mean_of_means)} hours round trip | from n={n} origins"
).show(renderer="notebook")
# total cost fn
from scipy.stats import zscore
df = distance_from_origin_stats[['_census_total','_adi_mean','round_trip_drive_hrs_mean']]
df
| _census_total | _adi_mean | round_trip_drive_hrs_mean | |
|---|---|---|---|
| 0 | 442190 | 71.368421 | 12.230000 |
| 1 | 324591 | 65.675000 | 12.630000 |
| 2 | 491463 | 66.857143 | 12.096667 |
| 3 | 225321 | 73.265625 | 13.920000 |
| 4 | 158109 | 85.100000 | 12.033333 |
| ... | ... | ... | ... |
| 521 | 15430 | 67.400000 | 7.973333 |
| 522 | 52930 | 51.636364 | 15.533333 |
| 523 | 38910 | 60.888889 | 12.453333 |
| 524 | 36958 | 37.461538 | 13.426667 |
| 525 | 20421 | 3.800000 | 15.386667 |
526 rows × 3 columns